library(sp)
library(gstat)
library(sf)
library(mapview)
library(tidyverse)
data(meuse)
head(meuse)
## x y cadmium copper lead zinc elev dist om ffreq soil lime
## 1 181072 333611 11.7 85 299 1022 7.909 0.00135803 13.6 1 1 1
## 2 181025 333558 8.6 81 277 1141 6.983 0.01222430 14.0 1 1 1
## 3 181165 333537 6.5 68 199 640 7.800 0.10302900 13.0 1 1 1
## 4 181298 333484 2.6 81 116 257 7.655 0.19009400 8.0 1 2 0
## 5 181307 333330 2.8 48 117 269 7.480 0.27709000 8.7 1 2 0
## 6 181390 333260 3.0 61 137 281 7.791 0.36406700 7.8 1 2 0
## landuse dist.m
## 1 Ah 50
## 2 Ah 30
## 3 Ah 150
## 4 Ga 270
## 5 Ah 380
## 6 Ga 470
ggplot(data = meuse) + geom_point(aes(x, y))
data(meuse.grid)
head(meuse.grid)
## x y part.a part.b dist soil ffreq
## 1 181180 333740 1 0 0.0000000 1 1
## 2 181140 333700 1 0 0.0000000 1 1
## 3 181180 333700 1 0 0.0122243 1 1
## 4 181220 333700 1 0 0.0434678 1 1
## 5 181100 333660 1 0 0.0000000 1 1
## 6 181140 333660 1 0 0.0122243 1 1
ggplot(data = meuse.grid) + geom_point(aes(x, y))
summary(meuse)
## x y cadmium copper
## Min. :178605 Min. :329714 Min. : 0.200 Min. : 14.00
## 1st Qu.:179371 1st Qu.:330762 1st Qu.: 0.800 1st Qu.: 23.00
## Median :179991 Median :331633 Median : 2.100 Median : 31.00
## Mean :180005 Mean :331635 Mean : 3.246 Mean : 40.32
## 3rd Qu.:180630 3rd Qu.:332463 3rd Qu.: 3.850 3rd Qu.: 49.50
## Max. :181390 Max. :333611 Max. :18.100 Max. :128.00
##
## lead zinc elev dist
## Min. : 37.0 Min. : 113.0 Min. : 5.180 Min. :0.00000
## 1st Qu.: 72.5 1st Qu.: 198.0 1st Qu.: 7.546 1st Qu.:0.07569
## Median :123.0 Median : 326.0 Median : 8.180 Median :0.21184
## Mean :153.4 Mean : 469.7 Mean : 8.165 Mean :0.24002
## 3rd Qu.:207.0 3rd Qu.: 674.5 3rd Qu.: 8.955 3rd Qu.:0.36407
## Max. :654.0 Max. :1839.0 Max. :10.520 Max. :0.88039
##
## om ffreq soil lime landuse dist.m
## Min. : 1.000 1:84 1:97 0:111 W :50 Min. : 10.0
## 1st Qu.: 5.300 2:48 2:46 1: 44 Ah :39 1st Qu.: 80.0
## Median : 6.900 3:23 3:12 Am :22 Median : 270.0
## Mean : 7.478 Fw :10 Mean : 290.3
## 3rd Qu.: 9.000 Ab : 8 3rd Qu.: 450.0
## Max. :17.000 (Other):25 Max. :1000.0
## NA's :2 NA's : 1
ggplot(data = meuse) + geom_point(aes(x, y, size=zinc))
class(meuse)
## [1] "data.frame"
class(meuse.grid)
## [1] "data.frame"
meuse2 <- st_as_sf(meuse, coords = c("x", "y"), crs = 28992)
meuse.grid2 <- st_as_sf(meuse.grid, coords = c("x", "y"),
crs = 28992)
class(meuse2)
## [1] "sf" "data.frame"
class(meuse.grid2)
## [1] "sf" "data.frame"
head(meuse2)
## Simple feature collection with 6 features and 12 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 181025 ymin: 333260 xmax: 181390 ymax: 333611
## Projected CRS: Amersfoort / RD New
## cadmium copper lead zinc elev dist om ffreq soil lime landuse dist.m
## 1 11.7 85 299 1022 7.909 0.00135803 13.6 1 1 1 Ah 50
## 2 8.6 81 277 1141 6.983 0.01222430 14.0 1 1 1 Ah 30
## 3 6.5 68 199 640 7.800 0.10302900 13.0 1 1 1 Ah 150
## 4 2.6 81 116 257 7.655 0.19009400 8.0 1 2 0 Ga 270
## 5 2.8 48 117 269 7.480 0.27709000 8.7 1 2 0 Ah 380
## 6 3.0 61 137 281 7.791 0.36406700 7.8 1 2 0 Ga 470
## geometry
## 1 POINT (181072 333611)
## 2 POINT (181025 333558)
## 3 POINT (181165 333537)
## 4 POINT (181298 333484)
## 5 POINT (181307 333330)
## 6 POINT (181390 333260)
head(meuse.grid2)
## Simple feature collection with 6 features and 5 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
## part.a part.b dist soil ffreq geometry
## 1 1 0 0.0000000 1 1 POINT (181180 333740)
## 2 1 0 0.0000000 1 1 POINT (181140 333700)
## 3 1 0 0.0122243 1 1 POINT (181180 333700)
## 4 1 0 0.0434678 1 1 POINT (181220 333700)
## 5 1 0 0.0000000 1 1 POINT (181100 333660)
## 6 1 0 0.0122243 1 1 POINT (181140 333660)
mapview(meuse2, zcol = "zinc", map.types = "CartoDB.Voyager")
mapview(meuse.grid2, map.types = "CartoDB.Voyager")
ggplot(meuse, aes(x=dist, y=zinc)) + geom_point()
ggplot(meuse, aes(x=dist, y=log(zinc))) + geom_point()
ggplot(meuse, aes(x=sqrt(dist), y=log(zinc))) + geom_point()
library(GGally)
ggpairs(meuse)
ggpairs(meuse[,3:9])
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_density()`).
lm <- lm(log(zinc)~sqrt(dist), meuse)
meuse$residuals <- residuals(lm)
meuse$fitted <- fitted(lm)
meuse$fitted2 <- predict(lm, meuse)
meuse$fitted.s <- predict(lm, meuse) - mean(predict(lm, meuse))
head(meuse)
## x y cadmium copper lead zinc elev dist om ffreq soil lime
## 1 181072 333611 11.7 85 299 1022 7.909 0.00135803 13.6 1 1 1
## 2 181025 333558 8.6 81 277 1141 6.983 0.01222430 14.0 1 1 1
## 3 181165 333537 6.5 68 199 640 7.800 0.10302900 13.0 1 1 1
## 4 181298 333484 2.6 81 116 257 7.655 0.19009400 8.0 1 2 0
## 5 181307 333330 2.8 48 117 269 7.480 0.27709000 8.7 1 2 0
## 6 181390 333260 3.0 61 137 281 7.791 0.36406700 7.8 1 2 0
## landuse dist.m residuals fitted fitted2 fitted.s
## 1 Ah 50 0.02907908 6.900438 6.900438 1.014661840
## 2 Ah 30 0.32712956 6.712531 6.712531 0.826754936
## 3 Ah 150 0.28533439 6.176134 6.176134 0.290357936
## 4 Ga 270 -0.33385786 5.882934 5.882934 -0.002841905
## 5 Ah 380 -0.05778586 5.652497 5.652497 -0.233278608
## 6 Ga 470 0.18211082 5.456244 5.456244 -0.429532005
library(viridis)
## Loading required package: viridisLite
ggplot(meuse, aes(x=x, y=y, col=fitted))+geom_point()+scale_color_viridis()
ggplot(meuse, aes(x=x, y=y, col=residuals))+geom_point()+scale_color_viridis()
ggplot(meuse, aes(x=x, y=y, col=fitted.s))+geom_point()+scale_color_viridis()
library(stars) |> suppressPackageStartupMessages()
library(gstat)
idw_result <- idw(zinc~1, meuse2, meuse.grid2)
## [inverse distance weighted interpolation]
idw_result
## Simple feature collection with 3103 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 178460 ymin: 329620 xmax: 181540 ymax: 333740
## Projected CRS: Amersfoort / RD New
## First 10 features:
## var1.pred var1.var geometry
## 1 633.6864 NA POINT (181180 333740)
## 2 712.5450 NA POINT (181140 333700)
## 3 654.1617 NA POINT (181180 333700)
## 4 604.4422 NA POINT (181220 333700)
## 5 857.2558 NA POINT (181100 333660)
## 6 755.5061 NA POINT (181140 333660)
## 7 667.7526 NA POINT (181180 333660)
## 8 604.4254 NA POINT (181220 333660)
## 9 1003.8711 NA POINT (181060 333620)
## 10 945.0017 NA POINT (181100 333620)
# Convert IDW output to a data frame
idw_df <- as.data.frame(idw_result)
head(idw_df)
## var1.pred var1.var geometry
## 1 633.6864 NA POINT (181180 333740)
## 2 712.5450 NA POINT (181140 333700)
## 3 654.1617 NA POINT (181180 333700)
## 4 604.4422 NA POINT (181220 333700)
## 5 857.2558 NA POINT (181100 333660)
## 6 755.5061 NA POINT (181140 333660)
colnames(idw_df) <- c("x", "y", "zinc_pred")
head(idw_df)
## x y zinc_pred
## 1 633.6864 NA POINT (181180 333740)
## 2 712.5450 NA POINT (181140 333700)
## 3 654.1617 NA POINT (181180 333700)
## 4 604.4422 NA POINT (181220 333700)
## 5 857.2558 NA POINT (181100 333660)
## 6 755.5061 NA POINT (181140 333660)
ggplot(data = meuse2) + geom_sf(data = idw_result, aes(color = var1.pred)) +
geom_sf() +
scale_color_viridis() + theme_bw()
hscat(log(zinc)~1, meuse2, (0:9)*100)
vc <- variogram(log(zinc) ~ 1, meuse2, cloud = TRUE)
plot(vc)
v <- variogram(log(zinc) ~ 1, data = meuse2)
plot(v)
vgm()
## short long
## 1 Nug Nug (nugget)
## 2 Exp Exp (exponential)
## 3 Sph Sph (spherical)
## 4 Gau Gau (gaussian)
## 5 Exc Exclass (Exponential class/stable)
## 6 Mat Mat (Matern)
## 7 Ste Mat (Matern, M. Stein's parameterization)
## 8 Cir Cir (circular)
## 9 Lin Lin (linear)
## 10 Bes Bes (bessel)
## 11 Pen Pen (pentaspherical)
## 12 Per Per (periodic)
## 13 Wav Wav (wave)
## 14 Hol Hol (hole)
## 15 Log Log (logarithmic)
## 16 Pow Pow (power)
## 17 Spl Spl (spline)
## 18 Leg Leg (Legendre)
## 19 Err Err (Measurement error)
## 20 Int Int (Intercept)
show.vgms(par.strip.text = list(cex = 0.75))
vg.fit1 <- vgm(psill = 0.5, model = "Sph",
range = 900, nugget = 0.1)
vg.fit1
## model psill range
## 1 Nug 0.1 0
## 2 Sph 0.5 900
plot(v, vg.fit1 , cutoff = 1000, cex = 1.5)
fv <- fit.variogram(object = v,
model = vgm(psill = 0.5, model = "Sph",
range = 900, nugget = 0.1))
fv
## model psill range
## 1 Nug 0.05066017 0.0000
## 2 Sph 0.59060556 897.0044
attr(fv, "SSErr")
## [1] 9.011194e-06
plot(v, fv, cex = 1.5)
fv2 <- fit.variogram(object = v,
model = vgm(psill = 0.6, model = "Sph",range = 900, nugget = 0.1))
fv2
## model psill range
## 1 Nug 0.05066522 0.0000
## 2 Sph 0.59061054 897.0412
attr(fv2, "SSErr")
## [1] 9.011195e-06
plot(v, fv2, cex = 1.5)
fv3 <- fit.variogram(object = v,
model = vgm(psill = 1, model = "Exp",range = 800, nugget = 0.1))
fv3
## model psill range
## 1 Nug 0.0000000 0.0000
## 2 Exp 0.7186519 449.7572
attr(fv3, "SSErr")
## [1] 1.628328e-05
plot(v, fv3, cex = 1.5)
library(ggplot2)
library(viridis)
k <- gstat(formula = log(zinc) ~ 1, data = meuse2, model = fv)
kpred <- predict(k, meuse.grid2)
## [using ordinary kriging]
head(kpred)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
## var1.pred var1.var geometry
## 1 6.499619 0.3198083 POINT (181180 333740)
## 2 6.622352 0.2520197 POINT (181140 333700)
## 3 6.505162 0.2729850 POINT (181180 333700)
## 4 6.387586 0.2955288 POINT (181220 333700)
## 5 6.764491 0.1779405 POINT (181100 333660)
## 6 6.635511 0.2022032 POINT (181140 333660)
ggplot() + geom_sf(data = kpred, aes(color = var1.pred)) +
geom_sf(data = meuse2) +
scale_color_viridis(name = "log(zinc)") + theme_bw()
ggplot() + geom_sf(data = kpred, aes(color = var1.var)) +
geom_sf(data = meuse2) +
scale_color_viridis(name = "variance") + theme_bw()
vg.dir <- variogram(log(zinc)~1, alpha = c(0, 45, 90, 135), meuse2)
plot(vg.dir)
The function call vgm(psill=0.59, model = “Sph”, range = 1200, nugget = 0.05, anis = c(45, 0.4)) in R creates a variogram model using the vgm() function from the gstat package. Here’s what each parameter means:
psill = 0.59: This is the partial sill, representing the difference between the sill (total variance) and the nugget effect. It defines the maximum semivariance minus the nugget.
model = “Sph”: The model type is Spherical, one of the commonly used variogram models. It describes how spatial correlation decreases with distance.
range = 1200: This is the range, the distance at which the spatial correlation drops to near zero (or 95% of the sill for the spherical model). Beyond this distance, observations are considered uncorrelated.
nugget = 0.05: The nugget effect, representing spatial variability at very small scales or measurement errors. It accounts for discontinuity at very short distances.
anis = c(45, 0.4): Specifies anisotropy, meaning the spatial correlation differs by direction.
45 indicates the direction (angle in degrees from the x-axis) where the correlation is strongest.
0.4 is the anisotropy ratio, meaning correlation decays faster in the perpendicular direction. A value of 0.4 means that in the perpendicular direction, the range is only 40% of the main direction’s range.
fit.ani <- vgm(psill=0.59, model = "Sph", range = 1200, nugget = 0.05, anis = c(45, 0.4))
plot(vg.dir, fit.ani)
This variogram model describes spatial dependence using a spherical variogram with anisotropy. The correlation extends up to 1200 units in the primary direction (45°), but only 480 units (1200 × 0.4) in the perpendicular direction. There is a small nugget effect (0.05), indicating slight short-scale variation or measurement noise.
Another way to detect directional dependence
vg.map <- variogram(log(zinc)~1, meuse2, map=T, cutoff=1000, width=100)
plot(vg.map, threshold=5)
vg.map <- variogram(log(zinc) ~ 1, meuse, map = TRUE, cutoff = 1000, width = 100)This code computes and visualizes the variogram map of the log-transformed zinc concentrations in the Meuse dataset.
variogram(log(zinc) ~ 1, meuse, …)
Computes the experimental semivariogram for log-transformed zinc concentrations. The ~ 1 formula means it calculates the variogram without considering covariates (i.e., only based on spatial distances).map = TRUE
Instead of returning a traditional one-dimensional variogram, this creates a spatial variogram map, which helps identify anisotropy (directional dependence of spatial variation).
cutoff = 1000
Defines the maximum lag distance (1,000 meters). Pairs of points separated by distances greater than this are ignored.
width = 100
Sets the bin width for grouping distances. Distance intervals of 100 meters are used when computing the variogram.
plot(vg.map, …)
Plots the spatial variogram map, where each pixel represents a directional semivariance value for a given spatial lag.
threshold = 5
Sets a threshold to filter high semivariance values to make the plot more interpretable.
log(zinc)~sqrt(dist)vgreg.est <- variogram(log(zinc)~sqrt(dist), meuse2)
plot(vgreg.est)
vgreg.fit <- fit.variogram(vgreg.est, vgm(0.2, "Sph", 800, 0.05))
vgreg.fit
## model psill range
## 1 Nug 0.07978675 0.0000
## 2 Sph 0.14902954 871.8744
plot(vgreg.est, vgreg.fit, cex = 1.5)
vgreg.dir <- variogram(log(zinc)~sqrt(dist), meuse2, alpha=c(0, 45, 90, 135))
plot(vgreg.dir)
plot(vgreg.dir, vgreg.fit, cex = 1.5)
The directional dependence is much less obvious in this case.
Another way to show this is variogram maps
vgreg.map <- variogram(log(zinc)~sqrt(dist), meuse2, map=T, cutoff=1000, width=100)
plot(vgreg.map, threshold=5)